Multi Thread Usage

Multi Thread Matrix Multiply
#include <iostream>
#include <thread>
#include <vector>
using namespace std;
typedef vector<vector<int>> vector2d;
void Thread2F(vector<int> Ai, vector<int> Bj, int* Cij){
int n=Ai.size();
for(int k=0; k<n; ++k) *Cij+=Ai[k]*Bj[k];
}
void Thread1F(vector<int> Ai, vector2d B, vector<int>* Ci){
int n=Ai.size();
vector<thread> threads;
for(int j=0; j<n; ++j){
vector<int> Bj(n);
for(int i=0; i<n; ++i){
Bj[i]=B[i][j];
}
(*Ci)[j]=0;
threads.push_back(thread(Thread2F, Ai, Bj, &(*Ci)[j]));
}
for(int j=0; j<n; ++j) threads[j].join();
}
vector2d PSquareMatrixMultiply(vector2d A, vector2d B){
assert(A[0].size()==B.size() && A.size()==B[0].size());
int n=A.size();
vector<thread> threads;
vector2d C(n, vector<int>(n, 0));
for(int i=0; i<n; ++i){
threads.push_back(thread(Thread1F, A[i], B, &(C[i])));
}
for(int i=0; i<n; ++i) threads[i].join();
return C;
}
int main(void){
vector2d A={
{1, 2, 3, 4, 5, 6, 7, 8, 9},
{1, 2, 3, 4, 5, 6, 7, 8, 9},
{1, 2, 3, 4, 5, 6, 7, 8, 9},
{1, 2, 3, 4, 5, 6, 7, 8, 9},
{1, 2, 3, 4, 5, 6, 7, 8, 9},
{1, 2, 3, 4, 5, 6, 7, 8, 9},
{1, 2, 3, 4, 5, 6, 7, 8, 9},
{1, 2, 3, 4, 5, 6, 7, 8, 9},
{1, 2, 3, 4, 5, 6, 7, 8, 9}
};
vector2d B={
{9, 8, 7, 6, 5, 4, 3, 2, 1},
{9, 8, 7, 6, 5, 4, 3, 2, 1},
{9, 8, 7, 6, 5, 4, 3, 2, 1},
{9, 8, 7, 6, 5, 4, 3, 2, 1},
{9, 8, 7, 6, 5, 4, 3, 2, 1},
{9, 8, 7, 6, 5, 4, 3, 2, 1},
{9, 8, 7, 6, 5, 4, 3, 2, 1},
{9, 8, 7, 6, 5, 4, 3, 2, 1},
{9, 8, 7, 6, 5, 4, 3, 2, 1},
};
vector2d C=PSquareMatrixMultiply(A, B);
for(int i=0; i<C.size(); ++i){
for(int j=0; j<C[0].size(); ++j) std::cout<<C[i][j]<<' ';
std::cout<<'\n';
}
std::cout<<std::endl;
return 0;
}
분할 정복 멀티스레드 알고리즘에 의한 행렬의 곱셈
Strassen’s Divide and Conquer Algorithm with Multi Thread Programming

introduction to Algorithms 3rd Edition(한글본; Korean Trasistion) pg.814 참조
Multi Thread Merge Sort
#include <iostream>
#include <thread>
void merge(int* arr, int p, int q, int r){
int n1=q-p+1;
int n2=r-q;
int L[n1+1], R[n2+1];
int i, j;
for(i=0; i<n1; ++i){
L[i]=arr[p+i];
}
for(j=0; j<n2; ++j){
R[j]=arr[q+j+1];
}
L[n1]=INT_MAX; R[n2]=INT_MAX;
i=0; j=0;
for(int k=p; k<=r; ++k){
if(L[i]<=R[j]){
arr[k]=L[i];
i++;
} else {
arr[k]=R[j];
j++;
}
}
}
void threadMergeSort(int* arr, int p, int r){
if(p<r){
int q=(p+r)/2;
std::thread T(threadMergeSort, arr, p, q);
threadMergeSort(arr, q+1, r);
T.join();
merge(arr, p, q, r);
}
}
int main(void){
int A[]={8, 4, 6, 2, 5, 9, 7, 10, 1, 3};
int sz=sizeof(A)/sizeof(int);
threadMergeSort(A, 0, 9);
for(int i=0; i<sz; ++i) std::cout<<A[i]<<' ';
std::cout<<std::endl;
return 0;
}
위의 코드는 분할하는 경우만 병렬 처리한다.
merge 과정도 병렬 처리를 적용할 수 있다.

아래의 병합 정렬이 위의 병합정렬보다 병렬성이 훨씬 좋다.
#include <iostream>
#include <thread>
#include <algorithm>
#include <vector>
#define swap(a, b){ int tmp=a; a=b; b=tmp; }
using namespace std;
int BinarySearch(int x, vector<int> T, int p, int r){
int low=p, high=max(p, r+1);
while(low<high){
int mid=(low+high)/2;
if(x<=T[mid]) high=mid;
else low=mid+1;
}
return high;
}
void PMerge(vector<int> T, int p1, int r1, int p2, int r2, vector<int>* A, int p3){
int n1=r1-p1+1, n2=r2-p2+1;
if(n1<n2){
swap(p1, p2); swap(r1, r2); swap(n1, n2);
}
if(n1==0) return;
else{
int q1=(p1+r1)/2;
int q2=BinarySearch(T[q1], T, p2, r2);
int q3=p3+(q1-p1)+(q2-p2);
(*A)[q3]=T[q1];
thread Thread(PMerge, T, p1, q1-1, p2, q2-1, A, p3);
PMerge(T, q1+1, r1, q2, r2, A, q3+1);
Thread.join();
}
}
void PMergeSort(vector<int> A, int p, int r, vector<int>* B, int s){
int n=r-p+1;
if(n==1) (*B)[s]=A[p];
else {
vector<int> T(n);
int q1=(p+r)/2;
int q2=q1-p+1;
thread Thread(PMergeSort, A, p, q1, &T, 0);
PMergeSort(A, q1+1, r, &T, q2);
Thread.join();
PMerge(T, 0, q2-1, q2, n-1, B, s);
}
}
int main(void){
vector<int> A={8, 4, 6, 2, 5, 9, 7, 10, 1, 3};
int sz=A.size();
vector<int> B(sz);
PMergeSort(A, 0, 9, &B, 0);
for(int i=0; i<sz; ++i) std::cout<<B[i]<<' ';
std::cout<<std::endl;
}